/* Transmission line exercise: divide TenneT in North and South region 	*/

* (1) Available production capacity N-S region 
* (2) Split load / production in N-S region
* (3) Identify lambdas in N-S region
* (4) Regressions on capacity imbalance (Table 2)


clear all
cd "${PATH}/data"
set more off


use "supply_capacity_TenneT.dta", clear
***************************************************
** (1)Capacity factor by technology others than wind and solar 

* Use average capacity factors 2015/16 EIA, SOURCES: 
** https://www.eia.gov/electricity/monthly/epm_table_grapher.php?t=epmt_6_07_b
** https://www.eia.gov/electricity/monthly/epm_table_grapher.php?t=epmt_6_07_a
gen 	capacity_factor = .72 if technology == "geothermal"
replace capacity_factor = .37 if technology =="Hydro"
replace capacity_factor = .92 if technology =="Nuclear"
replace capacity_factor = .63 if technology =="biomass"
replace capacity_factor = .63 if technology =="Waste"
replace capacity_factor = .53 if technology =="Hard coal" | technology =="Lignite"
replace capacity_factor = .55 if technology =="Natural gas"
replace capacity_factor = .13 if technology =="Oil"
replace capacity_factor = .5 if technology =="Other fuels" // no info


// Use realized (time-varying) capacity factor (CF) for intermittent (renewable) technologies : wind, solar, wind offshore as always inframarginal!

preserve
use data_15min.dta, clear 
keep if TSO == "TenneT"
gen date_day = dofc(time_utc)
gen date_hour = hh(time_utc)
keep date_day date_hour wind_gen wind_onshore_gen wind_offshore_gen solar_gen 

// Production data in MW, i.e. MW produced during 15min interval, average to get MWh (hourly data)
collapse (mean) *_gen, by(date_day date_hour)
tempfile temp2
save `temp2' 
restore


** MERGE SOLAR / WIND REALIZED Capacity factors ** 
merge m:1 date_day date_hour  using `temp2'
drop _merge // all merged

// Generate total RES capacity in TenneT (N and S joint)
bysort technology date_day date_hour: egen capacity_tot = total(capacity) 

// Replace CF with realized data
replace capacity_factor = solar_gen / capacity_tot if technology == "solar"
replace capacity_factor = wind_onshore_gen / capacity_tot if technology == "wind"
replace capacity_factor = wind_offshore_gen / capacity_tot if technology == "wind offshore" 


// drop variables at TSO level
drop *_gen capacity_tot
sort split technology date_day date_hour
order split technology date_day date_hour


** AVAILABLE CAPACITY FOR CONVENTIONAL FUELS ** 
gen cap_available = (capacity - capacity_missing) * capacity_factor if (technology != "solar" & technology != "wind" & technology != "wind offshore")

** SOME VALUES ARE NEGATIVE **
replace cap_available = 0 if cap_available < 0 

** AVAILABLE CAPACITY FOR RES
replace cap_available = capacity * capacity_factor if (technology == "solar" | technology == "wind" | technology == "wind offshore")
// do not take into account wind plant "unavailability" in this case, as part of hourly production profile

** SUMMARY STATS ON AVAILABLE CAPACITY 
version 15: table technology split, c(mean cap_available sd cap_available) 



***********************************************************
** COST ELEMENTS

preserve
use lambda_15min, clear

keep if TSO == "TenneT"
keep cost_MWH_* emission_MWH_crude_oil emission_MWH_coal ///
	emission_MWH_heating_oil emission_MWH_ng dAS_dR date hour 

collapse cost_MWH* emission_* dAS_dR, by(date hour)
rename date date_day
rename hour date_hour

tempfile temp5
save `temp5'
restore

merge m:1 date_day date_hour using `temp5'
drop _merge

// CREATE MC COLUMN ACCORDING TO THE COLUMN TECHNOLOGY
gen marginal_cost = 0 
replace marginal_cost = cost_MWH_coal if technology == "Hard coal" | technology == "Other fuels"
replace marginal_cost = cost_MWH_ng if technology == "Natural gas" 
replace marginal_cost = cost_MWH_crude_oil if technology == "Oil"
replace marginal_cost = 11.3 if technology == "Lignite" 
replace marginal_cost = 5.1 if technology == "Nuclear"
replace marginal_cost = 6 if technology == "Waste" | technology == "biomass"

drop cost_*
gen marginal_emissions = 0 
replace marginal_emissions = emission_MWH_coal if technology == "Hard coal" | technology == "Other fuels"
replace marginal_emissions = 1.148 if technology == "Lignite" 
replace marginal_emissions = emission_MWH_ng if technology == "Natural gas" 
replace marginal_emissions = emission_MWH_crude_oil if technology == "Oil"

sort split date_day date_hour marginal_cost //MC_ordering


// collapse (sum) cap_available, by(split date_day date_hour MC_ordering)
bysort split date_day date_hour (marginal_cost): gen cum_cap_available = sum(cap_available) 
* DISTINGUISH BETWEEN TOTAL and cumulative
bysort split date_day date_hour (marginal_cost): egen tot_cap_available = total(cap_available)


*******************************************************************************
// Construct step function for TenneT_N and TenneT_S

gen type = 0 if technology =="Hydro" | technology =="wind" | technology =="wind offshore" | technology =="solar" | technology =="geothermal"
replace type = 1 if technology =="Nuclear"
replace type = 2 if technology =="biomass" | technology =="Waste"
replace type = 3 if technology =="Lignite"
replace type = 4 if technology =="Hard coal" | technology =="Other fuels"
replace type = 5 if technology =="Natural gas"
replace type = 6 if technology =="Oil"

replace technology = "RES" if type ==0
replace technology = "Nuclear" if type ==1
replace technology = "Biomass" if type ==2
replace technology = "Lignite" if type ==3
replace technology = "Hard Coal" if type ==4
replace technology = "Natural Gas" if type ==5
replace technology = "Oil" if type ==6


// Recalculate cum_cap available with aggregation of types
drop cum_cap_available tot_cap_available
bysort split date_day date_hour type: egen tot_cap_available_type = total(cap_available) 

collapse (mean) tot_cap_available_type marginal_cost marginal_emissions (first) technology, by(split type date_day date_hour)

sort split date_day date_hour type
by split date_day date_hour (type): gen cum_cap_available = sum(tot_cap_available_type) 
drop tot_cap_available_type


**************************************************
** (2) Split "load" by population (alternatively GDP) in load North and South (data from "regionalstatistik.de , 2015)

*************
preserve

** DATA on TSO-county merge: 
use "county_TSO_covar_powerplants.dta", clear
keep ags_countycode county_name TSO state
duplicates drop ags_countycode, force
ren ags_countycode destatis_id 
replace destatis_id = destatis_id  / 1000
ren county_name county_name_m
tempfile temp3
save `temp3' 


** SPLIT LOAD EITHER BY GDP OR BY POPULATION
/** DATA ON LOCAL GDP 
import excel using "./Covariates_buildings_GDP/RegionalGDP_82111-01-05-4.xlsx", clear cellrange(A8:M545)
ren A destatis_id
ren B county_name
ren C GDP
ren D GDP_worker
ren E GDP_employee
drop F-M
drop if strlen(destatis_id) < 5 | strlen(destatis_id) > 5
destring destatis_id, replace
destring GDP*, force replace // missing "-"
distinct destatis_id if !missing(GDP)


** DATA on Population (31.12.2014)
import excel using "./Covariates_buildings_GDP/RegionalPopulation_12411-01-01-4.xlsx", clear cellrange(A8:C544)
ren A destatis_id
ren B county_name
ren C population

drop if strlen(destatis_id) < 5 | strlen(destatis_id) > 5
destring destatis_id, replace
destring population, force replace // missing "-"
distinct destatis_id if !missing(population)

merge 1:1 destatis_id using `temp3'
// non merge: city states of Hamburg and Berlin (both 50HZ, not of interest)
keep if _merge == 3
drop _merge 
drop county_name_m // merge OK
keep if TSO == "TenneT"
gen split = "South" if state == "Bayern"
replace split = "North" if missing(split)


/*
** GDP
egen tot_GDP = total(GDP) // all TenneT
bysort split: egen tot_GDP_reg = total(GDP) // North vs. South 
gen GDP_share = tot_GDP_reg / tot_GDP
duplicates drop split , force
keep split tot_GDP* GDP_share
tabstat GDP_share, by(split) s(mean)
*/


** POPULATION
egen tot_pop = total(population) // all TenneT
bysort split: egen tot_pop_reg = total(population) // North vs. South 

gen pop_share = tot_pop_reg / tot_pop
duplicates drop split , force
keep split tot_pop* pop_share
tabstat pop_share, by(split) s(mean)
*/


// THESE ARE THE SHARES TO SPLIT LOAD: GDP based
*local north_share .5320487
*local south_share .4679513


// THESE ARE THE SHARES TO SPLIT LOAD: Population based
local north_share .5892228
local south_share .4107772



// FOR SOLAR PRODUCTION, USE OFFICIAL INFORMATION FROM TENNET FOR 2015 AND 2016
** Reported for individual states, i.e., Bavaria: 
* Source: https://www.tennet.eu/electricity-market/transparency-pages/transparency-germany/network-figures/actual-and-forecast-solar-energy-feed-in/bayern/ 
** FILE: "TenneT_Bavaria/Tennet_solar_South.dta"


// PREPARE LOAD DATA TO BE MERGED WITH THE MC DATA
use "data_15min", clear
gen renewables = solar_gen
keep if renewables > 0  // keep only positive solar output hours in line with other file 

keep TSO time_utc load renewables
keep if TSO == "TenneT"

gen date_day = dofc(time_utc)
format date %td
gen date_hour = hh(time_utc)

// Expand to generate N, S observations // 
expand 2
sort time_utc
gen split = cond(mod(_n, 2) == 1, "North", "South")

** Load split in line with shares above 
gen load_split = cond(split == "North", `north_share' * load, `south_share' * load)

** Production split in line with "observed production Bavaria"
merge m:1 time_utc using  "Tennet_solar_South.dta"
keep if _merge == 3 // merge ==2, hours without positive solar production
drop solar_fc diff _merge
ren solar solar_South

gen renewables_split = solar_South if split == "South"
replace renewables_split = renewables - solar_South  if split =="North"
replace renewables_split = 0 if renewables_split < 0 
// p1: is .1-> includes some negative values; replace with 0

collapse load renewables load_split renewables_split (first) time_utc TSO, by(split date_day date_hour)
// recall definition of total load  in ENTSO-E (including losses without stored energy) = Net Generation – Exports + Imports – Absorbed Energy !!
tempfile temp4
save `temp4' 
**************
restore


// MERGE WITH CUMUL. CAPACITY AVAILABLE
merge m:1 split date_day date_hour using `temp4' 
** merge ==1 hours without solar production

keep if !missing(load_split)
drop _merge


*********************************
** (3) Identify lambdas + run regressions 
* -> Define marginal unit at 15min interval
** Supply and Demand: hourly data


** IDENTIFY LAMBDAS
/* Note: There are many instances in which there is no intersection between supply and demand in Bavaria (TenneT-S). Bavaria has been net-importer during this time period (even more so after shutdown of nuclear plans in 2016)

* Solution: keep installed capacity as of "beginning" of our sample (1/1/2015), i.e., do not assume shutdown of nuclear plants to there is sufficient capacity generated within Bavaria. 
*/

sort split date_day date_hour type
gen enough_supply = 1 if load_split < cum_cap_available


// DEFINE LAMBDAS + keep track of lambdas in case more solar gets reallocated // 
bysort split date_day date_hour (type) : gen temp = 1 if (enough_supply ==1 & _n ==1) | (enough_supply ==1 & enough_supply[_n-1] ==. &  _n !=1) 

** Define lambdas for "benchmark case" **
gen lambda = marginal_cost if temp == 1
gen lambda_emissions = marginal_emissions if temp == 1
gen lambda_technology = technology if temp == 1


** Define steps
tab type if temp ==1 // all types are used 


// Expand dataset to contain all MC and ME as variables
preserve 
keep split date_day date_hour type marginal_cost marginal_emissions cum_cap_available

ren marginal_cost lambda_mc_step_
ren marginal_emissions lambda_me_step_
ren cum_cap_available lambda_step_  

** DEFINE MARGINAL TECHNOLOGY in benchmark (similar to main analysis) + RESHAPE DATA ** 
reshape wide  lambda_mc_step_ lambda_me_step_  lambda_step_, i(split date_day date_hour) j(type)
tempfile temp1
save `temp1'
restore

merge m:1 split date_day date_hour using `temp1'
drop _merge

drop if lambda == . // keep only marginal technology 
drop enough_supply temp technology marginal_cost marginal_emissions

// NOTE: "current" intersection with load is saved in "type",i.e. type ==1 means that marginal technology is nuclear. In line with main file, we allow for lower MB in case MORE solar (compared to the benchmark case) gets located in a given TSO
replace TSO = "TenneT_North" if split =="North"
replace TSO = "TenneT_South" if split =="South"
drop cum_cap_available load renewables time_utc
	
order TSO date_day date_hour load_split renewables_split type lambda lambda_emissions lambda_technology  lambda_step_* lambda_mc_step_* lambda_me_step_*
	
// Reconstruct the steps in the same way they are created for the main file (ENTSO-E data)
** Replace lambda steps as "step length" of each technology **
ren lambda_step_* cum_lambda_step_* 

gen lambda_step_6 = cum_lambda_step_6 - cum_lambda_step_5
gen lambda_step_5 = cum_lambda_step_5 - cum_lambda_step_4
gen lambda_step_4 = cum_lambda_step_4 - cum_lambda_step_3
gen lambda_step_3 = cum_lambda_step_3 - cum_lambda_step_2
gen lambda_step_2 = cum_lambda_step_2 - cum_lambda_step_1
gen lambda_step_1 = cum_lambda_step_1 - cum_lambda_step_0
gen lambda_step_0 = cum_lambda_step_0

mvencode lambda_step_*, mv(0) override

* Check which technology is the marginal technology, replace others with zero
forvalues t = 1/6 {
local n = `t' -1
replace lambda_step_`t' = 0 if type == `n'
replace lambda_step_`t' = 0 if lambda_step_`n' == 0
}	

** Construct exact position on supply "step" for marginal technology
forvalues t = 0/6 {
replace cum_lambda_step_`t' = cum_lambda_step_`t' - load_split

replace lambda_step_`t' = cum_lambda_step_`t' if type == `t' 
}	

drop cum_lambda_step_*
order TSO date_day date_hour load_split renewables_split type lambda lambda_emissions lambda_technology  lambda_step_0 lambda_step_1 /// 
lambda_step_2 lambda_step_3 lambda_step_4 lambda_step_5 lambda_step_6 lambda_mc_step_* lambda_me_step_*


*
// Copy Lambdas-South + solar production to North in separate variable
preserve
keep if TSO =="TenneT_South"
keep TSO date_day date_hour lambda*
replace TSO = "TenneT_North" // for merge
drop lambda_technology
ren lambda_* lambda_S_*	
ren lambda lambda_S
drop lambda_S_me_* // not relevant
tempfile temp6
save `temp6'
restore

merge m:1 TSO date_day date_hour using `temp6'
drop if _merge == 2
drop _merge 

save "TenneT_split.dta", replace	
****************************************


* (4) Regressions on capacity imbalance
use  "TenneT_split.dta", clear
		
sum lambda if split == "North",d
sum lambda if split == "South",d
tab lambda_technology if split == "South", sort
tab lambda_technology if split == "North", sort


** RUN REGRESSIONS FOR THOSE TIME INTERVALS WHERE THE LAMBDAS ARE DIFFERENT IN EACH REGION
gen split_num = 1 if split == "North"
replace split_num = 2 if split == "South"

tab split_num
gen double time_utc = dhms(date_day,date_hour,0,0)
format time_utc %tc

xtset time_utc split_num 
drop TSO type lambda_step_*
reshape wide lambda* load_split renewables_split split , i(time_utc) j(split_num)

// FOR THE FIXED EFFECTS:
gen month = month(date_day)
gen hour = hh(time_utc)
gen dayweek = dow(date_day)
gen year = yofd(date_day)


// BY INCREASING THE REQUIRED SIZE OF THE LAMBDA GAP, DELTA_K DECREASES

gen residual_load1 = renewables_split1 - load_split1
gen residual_load2 = renewables_split2 - load_split2

gen diff_lambda = lambda1 - lambda2
lab var diff_lambda "{&lambda}{subscript:N} - {&lambda}{subscript:S}"

hist diff_lambda,  xlabel(, grid) color(blue%20) //////
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1mono)	
graph export "../figures/hist_diff_lambdas.pdf", replace	


est clear

forval gap = 2(3)8 {
	// NORTH REGRESSION
	// \lambda_N = a_N + b_N (R_N - Q_N) + c_N Q_S + FEs 
	eststo: qui areg lambda1 residual_load1 load_split2 i.month#i.hour if abs(lambda1 - lambda2) > `gap', a(date_day) vce(cluster date_day)
	
	// SOUTH REGRESSION
	// \lambda_S = a_S + b_S (R_S - Q_S) + c_S Q_N + FEs
	eststo: qui areg lambda2 residual_load2 load_split1 i.month#i.hour  if abs(lambda1 - lambda2) > `gap', a(date_day)  vce(cluster date_day)
}

esttab,  keep(residual_load1 load_split2 residual_load2 load_split1 _cons)  se scalars(r2 N)
esttab using "../results/table_deltaK.tex", replace ///
keep(residual_load1 load_split2 residual_load2 load_split1) se scalars(r2 N)



// COMPUTE DELTA K (THE SIZE OF THE TRANSMISSION CAPACITY CONSTRAINT FOR EACH HOUR IF THERE IS AN IMBALANCE OF LAMBDAS
local coeff1     -0.000932 // coefficient residual_1 lambda1 (1)
local coeff2     -0.00634  // coefficient residual_2 lambda2 (2)
*di  (`coeff1' - `coeff2') *4000


local gap 2

cap drop deltaK
gen deltaK = abs(lambda1 - lambda2) / (`coeff1' - `coeff2') if abs(lambda1 - lambda2) > `gap'
label var renewables_split2 "Solar Output South"
sum deltaK 



tw scatter deltaK renewables_split2, m(sh) col(navy) ylabel( , grid) ytitle("MW")	///
  graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color)  ///
  || qfit deltaK renewables_split2
  graph export "../figures/deltaK_`gap'dlsmwh.pdf", replace 


  
  
// *************************************************************************** //
// ROBUSTNESS: include load in other TSOs as additional control //

preserve
use "data_15min", clear
gen renewables = solar_gen
keep if renewables > 0  

keep TSO time_utc load renewables
keep if TSO != "TenneT"

gen date_day = dofc(time_utc)
format date %td
gen date_hour = hh(time_utc)

collapse load_= load renewables_ = renewables , by(TSO date_day date_hour)
encode TSO, gen(TSO_num) // 1=50HZ, 2=Amprion, 3=TransnetBW

drop TSO
reshape wide load renewables,i(date_day date_hour) j(TSO_num)
tempfile temp7
save `temp7'
restore

merge m:1 date_day date_hour using `temp7'
drop if _merge == 2
drop _merge 

// RERUN ABOVE REGRESSIONS //
est clear

forval gap = 2(3)8 {
	// NORTH REGRESSION
	// \lambda_N = a_N + b_N (R_N - Q_N) + c_N Q_S + FEs 
	eststo: qui areg lambda1 residual_load1 load_split2 load_1 load_2 load_3 i.month#i.hour if abs(lambda1 - lambda2) > `gap', a(date_day) vce(cluster date_day)
	
	// SOUTH REGRESSION
	// \lambda_S = a_S + b_S (R_S - Q_S) + c_S Q_N + FEs
	eststo: qui areg lambda2 residual_load2 load_split1 load_1 load_2 load_3 i.month#i.hour  if abs(lambda1 - lambda2) > `gap', a(date_day)  vce(cluster date_day)
}

esttab, keep(residual_load1 load_split2 residual_load2 load_split1 load_1 load_2 load_3 _cons) order(residual_load1 load_split2 residual_load2 load_split1 load_1 load_2 load_3 _cons) se scalars(r2 N)

esttab using "../results/table_deltaK_robust.tex", replace ///
keep(residual_load1 load_split2 residual_load2 load_split1 load_1 load_2 load_3 _cons) order(residual_load1 load_split2 residual_load2 load_split1 load_1 load_2 load_3 _cons) se scalars(r2 N)


// COMPUTE DELTA K (THE SIZE OF THE TRANSMISSION CAPACITY CONSTRAINT FOR EACH HOUR IF THERE IS AN IMBALANCE OF LAMBDAS
local coeff1      -0.00135 // coefficient residual_1 lambda1 (1)
local coeff2     -0.00636 // coefficient residual_2 lambda2 (2)
*di  (`coeff1' - `coeff2') *4000

local gap 2

cap drop deltaK
gen deltaK = abs(lambda1 - lambda2) / (`coeff1' - `coeff2') if abs(lambda1 - lambda2) > `gap'
label var renewables_split2 "Solar Output South"
sum deltaK 
// VERY SIMILAR TO MAIN RESULTS IN PAPER //


** Compare coefficients: for main model (Columns 1 and 2 )
di (-0.00135 + 0.000932 ) / sqrt(0.000326^2 + 0.000301^2)
// yields z value of -.942 -> NOT statistically different

di (-0.00636 + 0.00634) /  sqrt(0.000586^2 + 0.000583^2)
		